clear all
set more off
set mem 10000000
set matsize 10000

**************************************
*** SHRUG build + merge **************
**************************************

** Set file paths
do "$path_code/paths.do"

********************************************************************************
********************************************************************************

** Step 1: Process SHRUG 2001 PCA keys
{
use "$shrug/shrug-v1.4.samosa-keys-dta/shrug_pc01r_key.dta", clear
unique pc01_*
assert r(unique)==r(N)

gen missing = pc01_district_id==""
tab pc01_state_id missing
duplicates t shrid, gen(dup_shrid)
tab dup_shrid missing

destring pc01_*, replace
rename pc01_state_id st_code
rename pc01_district_id dt_code
rename pc01_subdistrict_id ta_code
rename pc01_village_id vi_code 
assert st_code!=. & vi_code!=.

la var missing "2001 (Sub)district IDs missing in SHRUG keys"
la var dup_shrid "Number of duplicate shrids"

sort *
compress
save "$shrug/shrug_pca01_xwalk.dta", replace

}
	   
********************************************************************************
********************************************************************************

** Step 2: Collapse SHRUG auxiliary file to 2001 PCA identifiers
{
use "$shrug/shrug_pca01_xwalk.dta", clear
merge m:1 shrid using "$shrug/shrug-v1.4.samosa-ancillary-dta/shrug_ancillary.dta"
tab st_code _merge

	// not much we can do with non-merges, hopefully it's not a issue going forward
keep if _merge==3
unique st_code dt_code vi_code	
assert r(unique)==r(N)	
keep st_code-dup_shrid secc_*	
rename secc* secc11*

	// merge in PCA 2001 unique identifiers
merge 1:m st_code dt_code vi_code using "$panel/panel_dataset_full.dta", keepusing(pca01_id) keep(3) nogen
duplicates t st_code dt_code vi_code, gen(dup)
br if dup>0 // 1 village, gonna leave it in there as is
drop dup


order pca01_id
sort *
unique pca01_id
assert r(unique)==r(N)
compress
save "$shrug/shrug_secc.dta", replace

}
	   
********************************************************************************
********************************************************************************
	   
	   